In [1]:
%reload_ext watermark
%watermark -p networkx
In [2]:
import networkx as nx
from networkx.algorithms.community import k_clique_communities, girvan_newman
import matplotlib.pyplot as plt
%matplotlib inline
In [3]:
GA = nx.read_gexf('../data/ga_graph.gexf')
The Girvan–Newman algorithm detects communities by progressively removing edges from the original graph. The algorithm removes the “most valuable” edge, traditionally the edge with the highest betweenness centrality, at each step. As the graph breaks down into pieces, the tightly knit community structure is exposed.
NetworkX's girvan_newman
returns:
Iterator over tuples of sets of nodes in G. Each set of nodes is a community, each tuple is a sequence of communities at a particular level of the algorithm.
In [4]:
gn_comm = girvan_newman(GA)
In [5]:
first_iteration_comm = tuple(sorted(c) for c in next(gn_comm))
In [6]:
dict(enumerate(first_iteration_comm))
Out[6]:
In [7]:
def map_communities(G, communities):
"""Return a mapping of community membership from a community set tuple"""
community_map = {}
for node in G.nodes():
for i, comm in enumerate(communities):
if node in comm:
community_map[node] = i
if community_map.get(node, None) is None:
community_map[node] = None
return community_map
In [8]:
from helpers import create_color_map
In [9]:
community_map = map_communities(GA, first_iteration_comm)
nx.set_node_attributes(GA, 'community', community_map)
node_colors, color_map, palette = create_color_map(GA, 'community')
In [10]:
nx.draw(GA, node_color=node_colors, with_labels=True)
In [11]:
second_comm = tuple(sorted(c) for c in next(gn_comm))
community_map_2 = map_communities(GA, second_comm)
nx.set_node_attributes(GA, 'community two', community_map_2)
node_colors, color_map, palette = create_color_map(GA, 'community two')
In [12]:
nx.draw(GA, node_color=node_colors, with_labels=True)
a clique is a subset of vertices of an undirected graph such that its induced subgraph is complete; that is, every two distinct vertices in the clique are adjacent.
A k-clique community is the union of all cliques of size k that can be reached through adjacent (sharing k-1 nodes) k-cliques.
In [13]:
k_clique = k_clique_communities(GA, 2)
In [14]:
dict(enumerate(k_clique))
Out[14]:
In [15]:
k_clique = k_clique_communities(GA, 3)
dict(enumerate(k_clique))
Out[15]:
In [16]:
print("Percent of ALL edges that could exist: %0.2f" % (nx.density(GA) * 100))
In [17]:
Karate = nx.karate_club_graph()
In [18]:
gn_comm = girvan_newman(Karate)
first_comm = tuple(sorted(c) for c in next(gn_comm))
community_map = map_communities(Karate, first_comm)
nx.set_node_attributes(Karate, 'community gn', community_map)
node_colors, color_map, palette = create_color_map(Karate, 'community gn')
In [19]:
nx.draw(Karate, node_color=node_colors, with_labels=True)
In [20]:
k_clique = k_clique_communities(Karate, 3)
k_clique_comm = [list(community) for community in k_clique]
community_map = map_communities(Karate, k_clique_comm)
nx.set_node_attributes(Karate, 'community k-clique', community_map)
node_colors, color_map, palette = create_color_map(Karate, 'community k-clique')
In [21]:
nx.draw(Karate, node_color=node_colors, with_labels=True)
In [22]:
import pandas as pd
In [23]:
club_community = [Karate.node[node] for node in Karate.nodes()]
club_df = pd.DataFrame(club_community)
In [24]:
pd.crosstab(club_df['club'], club_df['community gn'])
Out[24]:
In [25]:
pd.crosstab(club_df['club'], club_df['community k-clique'])
Out[25]: